/*
Updated: March 15, 2023
*/

cls
clear all
local dir "~\Dropbox\Working Papers\Distinction Effect"
cd "`dir'"

/*******************************************************************************
			1. Universe : College Graduates between 2001 - 2015-I
********************************************************************************/

import delimited "Data\Originals\OLE\laboral_egresados_2001_2015.csv", encoding("utf-8")  stringc(_all) clear

rename idprograma ole_programcode
rename idmodalidad ole_programlevel
rename idarea ole_programarea
rename idnucleo ole_programfield
rename primernombre ole_name1
rename segundonombre ole_name2
rename primerapellido ole_surname1
rename segundoapellido ole_surname2
rename fechanacimiento birthdate
rename genero gender
rename idtipodocumento national_id_type
rename numdocumento national_id

gen ole_gradtime = anio_grado+semestre_grado

keep national_id_type national_id ole_name1 ole_name2 ole_surname1 ole_surname2 birthdate gender
duplicates drop national_id_type national_id, force
save "Data\Intermediates\OLE", replace 


/*******************************************************************************
			2. PILA Files Yearly Files
********************************************************************************/

/***************
	2009
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2009.txt", clear

rename tipo_identificacion_aportante pila_firm_type
rename numero_identificacion_aportante pila_firm_id
rename razon pila_firm_name 
rename codigo_ac pila_ciiu
rename tipo_identificacion national_id_type
rename numero_identificacion national_id
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if length(national_id) <= 4
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_*)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
replace periodo_salud = substr(periodo_salud, 1, 10)
gen aux = date(periodo_salud, "YMD")
format aux %td
gen pila_earnings_month = month(aux)
drop aux

//Data Level : Worker-Payment
egen double dias = rowmax(dias_*)
bys national* *codigo : egen double aux = max(dias)
keep if aux == dias //565
drop aux dias

egen double ibc = rowmax(ibc_*)
bys national* *codigo : egen double aux = max(ibc)
keep if aux == ibc //1415
drop aux ibc

duplicates drop national* *codigo, force //97

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc*)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 496900 if pila_earnings > (2/3)*496900 &  pila_earnings < 496900 //1,063
replace pila_earnings = 25*496900 if pila_earnings > 25*496900 //409

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //28,527
replace pila_earnings = . if pila_earnings < (2/3)*496900 //87
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2009.dta", replace

/***************
	2010
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2010.csv", clear

rename tipoidentificacionaportante pila_firm_type
rename identificacionaportante pila_firm_id
rename razon pila_firm_name 
rename actividadeconomicacd pila_ciiu
rename tipoidentificacioncd national_id_type
rename identificacionpersonaid national_id
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if national_id <= 9999
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_*)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
replace periodo_salud = substr(periodo_salud, 1, 10)
gen aux = date(periodo_salud, "YMD")
format aux %td
gen pila_earnings_month = month(aux)
drop aux

//Data Level : Worker-Payment
egen double dias = rowmax(dias_*)
bys national* *numeroplanilla : egen double aux = max(dias)
keep if aux == dias //49
drop aux dias

egen double ibc = rowmax(ibc_*)
bys national* *numeroplanilla : egen double aux = max(ibc)
keep if aux == ibc //251
drop aux ibc

duplicates drop national* *numeroplanilla, force //8

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc*)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 515000 if pila_earnings > (2/3)*515000 &  pila_earnings < 515000 //621
replace pila_earnings = 25*515000 if pila_earnings > 25*515000 //894

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //29,098
replace pila_earnings = . if pila_earnings < (2/3)*515000 //484
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2010.dta", replace

/***************
	2011
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2011.csv", clear

rename tipoidentificacionaportante pila_firm_type
rename identificacionaportante pila_firm_id
rename razon pila_firm_name 
rename actividadeconomicacd pila_ciiu
rename tipoidentificaciondesc national_id_type
rename identificacionpersonaid national_id
rename primernombre pila_name1
rename segundonombre pila_name2
rename primerapellido pila_surname1
rename segundoapellido pila_surname2
rename tipocotizantecd pila_worker_type
rename numerodiascotizados dias

//Updates Entry/Withdrawal
replace ing = "" if ing == "NULL"
replace ing = trim(itrim(upper(ing)))
replace ret = "" if ret == "NULL"
replace ret = trim(itrim(upper(ret)))
gen updates = ing != "" | ret != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if national_id <= 9999
//Drop Payments < 12 days
destring dias, replace force
drop if dias < 12 & updates == 0

//Payment Month
replace periodosalud = "" if periodosalud == "NULL"
replace periodoresto = "" if periodoresto == "NULL"
gen pila_earnings_month = substr(periodosalud, 5, 2)
replace pila_earnings_month = substr(periodoresto, 5, 2) if pila_earnings_month == ""
destring pila_earnings_month, replace

//Data Level : Worker-Payment
bys national* *planillaid : egen double aux = max(dias)
keep if aux == dias //0
drop aux

destring ibc*, replace force
egen double ibc = rowmax(ibc*)
bys national* *planillaid : egen double aux = max(ibc)
keep if aux == ibc //8
drop aux ibc

duplicates drop national* *planillaid, force //0

//Data Level: Worker-Firm
//The File Needs Additional Cleaning. Consider the following case
br if national_id == 1010177895 //Representative of many cases

foreach i in salud pension ccf {
	bys national* pila_firm* : egen ibc_`i' = max(ibc`i')
}
bys national* pila_firm* : egen aux = max(dias)
keep if dias == aux //0
drop aux
 
egen ibc = rowmax(ibcsalud ibcpension ibcccf)
bys national* pila_firm* : egen aux = max(ibc)
keep if ibc == aux //279,395
drop aux ibcsalud ibcpension ibcccf

duplicates drop national* pila_firm*, force
 
//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	gen double aux_`i' = ibc_`i'*30/dias if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
bys national* : egen aux_dias =  total(dias) if updates == 1
replace aux_dias = 30 if aux_dias >= 31
replace aux_dias = . if aux_dias < 12
foreach i in salud pension ccf {
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .
drop aux_ibc*

//Drop Duplicates for Payments with Updates
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux

bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

//Days
rename dias pila_days

duplicates drop national*, force

//Workers' City (Municipality Code)
rename municipiocd pila_muni
destring pila_muni, replace force

//Truncation
replace pila_earnings = 535600 if pila_earnings > (2/3)*535600 &  pila_earnings < 535600 //716
replace pila_earnings = 25*535600 if pila_earnings > 25*535600 //9,178 

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //37,041
replace pila_earnings = . if pila_earnings < (2/3)*535600 //4
*drop if pila_earnings == . 

keep national* pila* updates

replace national_id_type = "CC" if regexm(national_id_type, "CIUDADA")
replace national_id_type = "CE" if regexm(national_id_type, "EXTRANJ")
replace national_id_type = "TI" if regexm(national_id_type, "IDENT")

save "Data\Intermediates\Pila 2011.dta", replace

/***************
	2012
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2012.csv", clear

rename tipoidentificacionaportante pila_firm_type
rename identificacionaportante pila_firm_id
rename razon pila_firm_name 
rename actividadeconomicacd pila_ciiu
rename tipoidentificaciondesc national_id_type
rename identificacionpersonaid national_id
rename primernombre pila_name1
rename segundonombre pila_name2
rename primerapellido pila_surname1
rename segundoapellido pila_surname2
rename tipocotizantecd pila_worker_type
rename numerodiascotizados dias

//Updates Entry/Withdrawal
replace ing = "" if ing == "NULL"
replace ing = trim(itrim(upper(ing)))
replace ret = "" if ret == "NULL"
replace ret = trim(itrim(upper(ret)))
gen updates = ing != "" | ret != ""
drop ing ret

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force

rename pila_worker_type aux
bys national* *planillaid: egen pila_worker_type = mode(aux)
drop aux

drop if pila_worker_type == .
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if national_id <= 9999
//Drop Payments < 12 days
drop if dias < 12 & updates == 0

//Payment Month
tostring fechapagoid, replace 
gen pila_earnings_month = substr(fechapagoid, 5, 2)
destring pila_earnings_month, replace

//Data Level : Worker-Payment
bys national* *planillaid tipoaporteid : egen double aux = max(dias)
keep if aux == dias //3,551
drop aux

bys national* *planillaid tipoaporteid : egen double aux = max(ibc)
keep if aux == ibc //9,621
drop aux

duplicates drop national* *planillaid tipoaporteid, force //84

rename updates aux
bys national* *planillaid: egen updates = mode(aux)
drop aux
rename municipiocd aux
bys national* *planillaid: egen municipiocd = mode(aux)
drop aux

gen aux = "_salud" if tipoaporteid == 7
replace aux = "_pension" if tipoaporteid == 3
replace aux = "_ccf" if tipoaporteid == 11
drop tipoaporteid
reshape wide dias ibc, i(national* *planillaid) j(aux) string

//Data Level : Worker-Firm
egen ibc = rowmax(ibc*)
bys national* pila_firm* : egen aux = max(ibc)
keep if ibc == aux //279,395
drop aux

duplicates drop national* pila_firm*, force

//Monthly Earnings
drop if updates == .
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_`i' = . if (dias_`i' < 12 | dias_`i' > 31) & updates == 0
	replace dias_`i' = 30 if dias_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename municipiocd pila_muni

//Truncation
replace pila_earnings = 566700 if pila_earnings > (2/3)*566700 &  pila_earnings < 566700 //1,063
replace pila_earnings = 25*566700 if pila_earnings > 25*566700 //409

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //28,527
replace pila_earnings = . if pila_earnings < (2/3)*566700 //87
*drop if pila_earnings == . 

keep national* pila* updates

replace national_id_type = "CC" if regexm(national_id_type, "CIUDADA")
replace national_id_type = "CE" if regexm(national_id_type, "EXTRANJ")
replace national_id_type = "TI" if regexm(national_id_type, "IDENT")

save "Data\Intermediates\Pila 2012.dta", replace

/***************
	2013
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2013.csv", clear

rename tipo_documento_aportante pila_firm_type
rename numero_documento_aportante pila_firm_id
rename razon_social pila_firm_name 
rename codigo_actividad_economica pila_ciiu
rename tipo_identificacion national_id_type
rename numero_identificacion national_id
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if length(national_id) <= 4
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_*)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
replace periodo_salud = substr(periodo_salud, 1, 10)
gen aux = date(periodo_salud, "YMD")
format aux %td
gen pila_earnings_month = month(aux)
drop aux

//Data Level : Worker-Payment
egen double dias = rowmax(dias_*)
bys national* numero_planilla : egen double aux = max(dias)
keep if aux == dias //49
drop aux dias

egen double ibc = rowmax(ibc_*)
bys national* numero_planilla : egen double aux = max(ibc)
keep if aux == ibc //251
drop aux ibc

duplicates drop national* numero_planilla, force //8

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc*)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 589500 if pila_earnings > (2/3)*589500 &  pila_earnings < 589500 //621
replace pila_earnings = 25*589500 if pila_earnings > 25*589500 //894

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //29,098
replace pila_earnings = . if pila_earnings < (2/3)*589500 //484
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2013.dta", replace

/***************
	2014
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2014.csv", clear

rename tipo_documento_aportante pila_firm_type
rename numero_documento_aportante pila_firm_id
rename razon_social pila_firm_name 
rename codigo_actividad_economica pila_ciiu
rename tipo_identificacion national_id_type
rename numero_identificacion national_id
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if national_id <= 9999
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_*)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
replace periodo_salud = substr(periodo_salud, 1, 10)
gen aux = date(periodo_salud, "YMD")
format aux %td
gen pila_earnings_month = month(aux)
drop aux

//Data Level : Worker-Payment
egen double dias = rowmax(dias_*)
bys national* numero_planilla : egen double aux = max(dias)
keep if aux == dias //49
drop aux dias

egen double ibc = rowmax(ibc_*)
bys national* numero_planilla : egen double aux = max(ibc)
keep if aux == ibc //251
drop aux ibc

duplicates drop national* numero_planilla, force //8

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc*)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 616000 if pila_earnings > (2/3)*616000 &  pila_earnings < 616000 //621
replace pila_earnings = 25*616000 if pila_earnings > 25*616000 //894

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //29,098
replace pila_earnings = . if pila_earnings < (2/3)*616000 //484
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2014.dta", replace

/***************
	2015
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2015.csv", clear

rename tipo_identificacion_ap pila_firm_type
rename numero_identificacion_ap pila_firm_id
rename razon_social pila_firm_name 
rename codigo_actividad_economica pila_ciiu
rename tipo_identificacion national_id_type
rename numero_identificacion national_id
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if national_id <= 9999
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_*)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
gen pila_earnings_month = substr(periodo_salud, 6, 2)
destring pila_earnings_month, replace

//Data Level : Worker-Payment
egen double dias = rowmax(dias_*)
bys national* numero_planilla : egen double aux = max(dias)
keep if aux == dias //49
drop aux dias

egen double ibc = rowmax(ibc_*)
bys national* numero_planilla : egen double aux = max(ibc)
keep if aux == ibc //251
drop aux ibc

duplicates drop national* numero_planilla, force //8

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias*)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc*)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_*) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 644350 if pila_earnings > (2/3)*644350 &  pila_earnings < 644350 //621
replace pila_earnings = 25*644350 if pila_earnings > 25*644350 //894

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //29,098
replace pila_earnings = . if pila_earnings < (2/3)*644350 //484
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2015.dta", replace

/***************
	2016
***************/
import delimited "Data\Originals\OLE\Pila Egresados 2016.csv", clear

rename tipo_identificacion_ap pila_firm_type
rename numero_identificacion_ap pila_firm_id
rename razon_social pila_firm_name 
rename codigo_actividad_economica pila_ciiu
rename tipo_identificacion national_id_type
rename numero_identificacion national_id
rename primer_nombre pila_name1
rename segundo_nombre pila_name2
rename primer_apellido pila_surname1
rename segundo_apellido pila_surname2
rename tipo_cotizante pila_worker_type

//Updates Entry/Withdrawal
replace ingreso = "" if ingreso == "SIN NOVEDAD" | ingreso == "NA"
replace ingreso = trim(itrim(upper(ingreso)))
replace retiro = "" if retiro == "SIN NOVEDAD" | retiro == "NA"
replace retiro = trim(itrim(upper(retiro)))
gen updates = ingreso != "" | retiro != ""

//Data Restrinctions
//Worker Type
destring pila_worker_type, replace force
# delimit ;
keep if pila_worker_type == 1 | pila_worker_type == 3 | pila_worker_type == 16 
| pila_worker_type == 19 | pila_worker_type == 22 | pila_worker_type == 30 
| pila_worker_type == 31 | pila_worker_type == 47
; # delimit cr
//Incorrect Woker's ID
drop if length(national_id) <= 4
//Drop Payments < 12 days
destring dias*, replace force
egen double dias = rowmax(dias_cot_salud dias_cot_pension dias_cot_ccf)
drop if dias < 12 & updates == 0
drop dias

//Payment Month
gen pila_earnings_month = substr(periodo_salud, 6, 2)
destring pila_earnings_month, replace

//Data Level : Worker-Payment
egen double dias = rowmax(dias_cot_salud dias_cot_pension dias_cot_ccf)
bys national* numero_planilla : egen double aux = max(dias)
keep if aux == dias //49
drop aux dias

destring ibc_salud ibc_pension ibc_ccf, replace force
egen double ibc = rowmax(ibc_salud ibc_pension ibc_ccf)
bys national* numero_planilla : egen double aux = max(ibc)
keep if aux == ibc //251
drop aux ibc

duplicates drop national* numero_planilla, force //8

//Monthly Earnings
//1. Payments Without Updates
foreach i in salud pension ccf {
	replace dias_cot_`i' = . if (dias_cot_`i' < 12 | dias_cot_`i' > 31) & updates == 0
	replace dias_cot_`i' = 30 if dias_cot_`i' == 31 & updates == 0
	gen double aux_`i' = ibc_`i'*30/dias_cot_`i' if updates == 0
	replace aux_`i' = . if aux_`i' <= 0
}
gen double pila_earnings = aux_salud 
replace pila_earnings = aux_pension if pila_earnings == .
replace pila_earnings = aux_ccf if pila_earnings == .
drop aux_*

//2. Payments with Updates 
foreach i in salud pension ccf {
	bys national* : egen aux_dias_`i' =  total(dias_cot_`i') if updates == 1
	replace aux_dias_`i' = 30 if aux_dias_`i' >= 31
	replace aux_dias_`i' = . if aux_dias_`i' < 12	
	bys national* : egen aux_ibc_`i' =  total(ibc_`i') if updates == 1
	replace aux_ibc_`i' = aux_ibc_`i'*30/aux_dias_`i'
	replace aux_ibc_`i' = . if aux_ibc_`i' <= 0	
}
replace pila_earnings = aux_ibc_salud if pila_earnings == .
replace pila_earnings = aux_ibc_pension if pila_earnings == .
replace pila_earnings = aux_ibc_ccf if pila_earnings == .

//Drop Duplicates for Payments with Updates (to Avoid Double Sum)
egen double dias = rowmax(dias_cot_salud dias_cot_pension dias_cot_ccf)
bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 1
drop aux 

egen double ibc = rowmax(ibc_salud ibc_pension ibc_ccf)
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 1
drop aux

duplicates drop national* if updates == 1, force

//Total Earnings by Worker
rename pila_earnings aux
bys national* : egen double pila_earnings = total(aux)
replace pila_earnings = round(pila_earnings, 1)
drop aux

//Days
egen double pila_days = rowmax(dias_cot_salud dias_cot_pension dias_cot_ccf) if updates == 0
egen aux = rowmax(aux_dias_*) if updates == 1
replace pila_days = aux if updates == 1
drop aux*

//Information at the Worker Level
//Drop Duplicates for Payments without Updates
bys national* : egen double aux = max(ibc)
drop if aux != ibc & updates == 0
drop aux

bys national* : egen double aux = max(dias)
drop if aux != dias & updates == 0
drop aux 

duplicates drop national* if updates == 0, force

//Drop Remaining Duplicates at Worker Level
bys national* : egen double aux = max(ibc)
drop if aux != ibc
drop aux

duplicates drop national*, force

//Workers' City (Municipality Code)
destring departamento municipio, replace force
rename departamento codigo_dpto
rename municipio codigo_muni
merge m:1 codigo_dpto codigo_muni using "Data\Originals\DANE\Llave Divipola.dta", keepusing(codmunicipio)
keep if _merge == 3 | _merge == 1
drop codigo_* _merge
rename codmunicipio pila_muni

//Truncation
replace pila_earnings = 689455 if pila_earnings > (2/3)*689455 &  pila_earnings < 689455 //621
replace pila_earnings = 25*689455 if pila_earnings > 25*689455 //894

//Final Data Restrictions
//No earnings
replace pila_earnings = . if pila_earnings == 0 //29,098
replace pila_earnings = . if pila_earnings < (2/3)*689455 //484
*drop if pila_earnings == . 

keep national* pila* updates

save "Data\Intermediates\Pila 2016.dta", replace

/*******************************************************************************
			2. PILA Files Yearly Files
********************************************************************************/

//Cleaning
global S pila_firm_name pila_surname1 pila_surname2 pila_name1 pila_name2
foreach year of numlist 2009(1)2016 {
	use "Data\Intermediates\Pila `year'.dta", clear
    
	destring pila_ciiu, replace force
	tostring pila_firm_id national_id, replace format(%25.0g)
	
	rename pila_firm_name aux 
	gen pila_firm_name = aux
	drop aux
	
	spell $S, clean($S) up($S)

	replace national_id_type = upper(national_id_type)	
	
	duplicates drop national*, force
	
	gen pila = 1
	
	compress 
	order pila national_id* pila_name? pila_surname? pila_worker_type updates pila_days pila_earnings* pila_ciiu pila_muni pila_firm*

	foreach var of varlist pila* updates {
		rename `var' `var'_`year'
	}	
	
	save "Data\Intermediates\Pila `year'.dta", replace
}

//Merge to OLE Universe
use "Data\Intermediates\OLE.dta", clear

foreach year of numlist 2009(1)2016 {
	merge 1:1 national* using "Data\Intermediates\Pila `year'.dta"
	keep if _merge == 3 | _merge == 1
	drop _merge
}

save "Data\Intermediates\Pila_2009_2016.dta", replace
